# 从huggingface下载的数据集格式为.arrow，转为.json

from datasets import load_from_disk

# 参数为字符串，文件路径
tem = load_from_disk("/liub/zyf/Fine-tuning/LMFlow-1.0.0/data/alpaca-gpt4/train")

#参数为字符串，带有文件名的路径
tem.to_json("/liub/zyf/Fine-tuning/LMFlow-1.0.0/data/alpaca-gpt4/train/changed_to_json.json")
